Defining the input and output files
# Clean the working environment
rm(list = ls())
knitr::opts_chunk$set(echo = TRUE)
empirical_species <- "German Shepherd"
document_title <- paste(empirical_species," Pipeline Results")
MAF_pruning_used <- FALSE
N_e <- 50
document_sub <- paste("MAF 0.01 used, but only for H_E-computation, N_e=", N_e)
# if (MAF_pruning_used == FALSE) {
# document_sub <- paste("No MAF-based pruning used, N_e =", N_e)
#
#
# } else {
# document_sub <- paste("MAF-based pruning used, N_e =", N_e)
# }
############################################
# Parameters used for displaying the result
############################################
ROH_frequency_decimals <- 1
H_e_values_decimals <- 5
F_ROH_values_decimals <- 3
####################################
# Defining the input files
####################################
Selection_strength_test_dir <- Sys.getenv("Selection_strength_test_dir")
Sweep_test_dir <- Sys.getenv("Sweep_test_dir")
###############
## Empirical ###
###############
### ROH hotspots ###
Empirical_data_ROH_hotspots_dir <- Sys.getenv("Empirical_data_ROH_hotspots_dir")
Empirical_data_autosome_ROH_freq_dir <- Sys.getenv("Empirical_data_autosome_ROH_freq_dir")
### Inbreeding coefficient ###
Empirical_data_F_ROH_dir <- Sys.getenv("Empirical_data_F_ROH_dir")
### Expected Heterozygosity distribution ###
Empirical_data_H_e_dir <- Sys.getenv("Empirical_data_H_e_dir")
###############
## Simulated ###
###############
### Causative variant ###
variant_freq_plots_dir <- Sys.getenv("variant_freq_plots_dir")
variant_position_dir <- Sys.getenv("variant_position_dir")
pruned_replicates_count_dir <- Sys.getenv("pruned_replicates_count_dir")
Selection_causative_variant_windows_dir <- Sys.getenv("Selection_causative_variant_windows_dir")
causative_variant_windows_H_e_dir <- Sys.getenv("causative_variant_windows_H_e_dir")
### ROH hotspots ###
Neutral_model_ROH_hotspots_dir <- Sys.getenv("Neutral_model_ROH_hotspots_dir")
Neutral_model_autosome_ROH_freq_dir <- Sys.getenv("Neutral_model_autosome_ROH_freq_dir")
Selection_model_ROH_hotspots_dir <- Sys.getenv("Selection_model_ROH_hotspots_dir")
Selection_model_autosome_ROH_freq_dir <- Sys.getenv("Selection_model_autosome_ROH_freq_dir")
### Inbreeding coefficient ###
Neutral_model_F_ROH_dir <- Sys.getenv("Neutral_model_F_ROH_dir")
Selection_model_F_ROH_dir <- Sys.getenv("Selection_model_F_ROH_dir")
### Expected Heterozygosity distribution ###
Neutral_model_H_e_dir <- Sys.getenv("Neutral_model_H_e_dir")
Selection_model_H_e_dir <- Sys.getenv("Selection_model_H_e_dir")
histogram_line_sizes <- 3
empirical_data_color <- "darkgreen"
neutral_model_color <- "blue"
selection_model_color <- "purple"
output_dir <- Sys.getenv("Pipeline_results_output_dir")
# Sys.getenv()
# # Set the working directory for notebook chunks
# knitr::opts_knit$set(root.dir = output_dir_sweep_test)
library(knitr)
## Warning: package 'knitr' was built under R version 4.3.2
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.1
# Function to determine the number of rows until fixation is reached
time_to_fixation <- function(data, threshold = 0.9) {
# Find the index of the first row where the allele frequency is 0.9 or higher
fixation_index <- which(data$allele_frequency >= threshold)[1]
# Return the number of rows until fixation is reached
return(fixation_index - 1)
}
| Selection_coefficient | Fixation_probability | Avg_Fixation_time | Min_fixation_time | Max_fixation_time | |
|---|---|---|---|---|---|
| 1 | s=0.05 | 0.2 | 33.80 | 27 | 39 |
| 3 | s=0.2 | 16.3 | 32.40 | 22 | 39 |
| 2 | s=0.1 | 1.5 | 31.85 | 18 | 39 |
| 4 | s=0.3 | 27.8 | 27.55 | 18 | 39 |
| 5 | s=0.4 | 40.8 | 20.95 | 15 | 29 |
| 6 | s=0.5 | 40.0 | 16.65 | 10 | 26 |
| 7 | s=0.6 | 35.7 | 11.45 | 8 | 16 |
| 8 | s=0.7 | 46.5 | 10.25 | 7 | 13 |
| 9 | s=0.8 | 45.5 | 7.45 | 4 | 10 |
| Selection_coefficient | Avg_Length_Mb | Avg_window_freq | Min_freq | Max_freq | Avg_freq_variant_100k_window | |
|---|---|---|---|---|---|---|
| 1 | s=0.05 | 3.655 | 71.0 | 32 | 100 | 71.9 |
| 3 | s=0.2 | 4.430 | 78.9 | 34 | 100 | 79.3 |
| 4 | s=0.3 | 4.435 | 83.1 | 38 | 100 | 84.0 |
| 5 | s=0.4 | 4.625 | 78.1 | 32 | 100 | 78.2 |
| 2 | s=0.1 | 4.775 | 73.2 | 12 | 100 | 73.9 |
| 6 | s=0.5 | 5.040 | 92.9 | 56 | 100 | 93.6 |
| 7 | s=0.6 | 5.875 | 87.2 | 48 | 100 | 88.0 |
| 8 | s=0.7 | 6.220 | 93.2 | 36 | 100 | 94.4 |
| 9 | s=0.8 | 7.895 | 88.9 | 28 | 100 | 89.9 |
Confidence level <=> konfidensgrad
# Function to calculate standard error and its confidence interval
standard_error_confidence_interval_fun <- function(observed_data, confidence_level = 0.95) {
# Calculate standard error
n <- length(observed_data)
standard_deviation <- sd(observed_data)
standard_error <- standard_deviation / sqrt(n - 1)
# Calculate confidence interval based on standard error
alpha <- (1 - confidence_level) / 2
margin_of_error <- qnorm(1 - alpha) * standard_error
mean_estimate <- mean(observed_data)
# Calculate the percentiles for the confidence interval
confidence_interval_lower_bound <- mean_estimate - margin_of_error # 2.5th percentile (2σ)
confidence_interval_upper_bound <- mean_estimate + margin_of_error # 97.5th percentile (2σ)
# Return confidence interval
return(c(confidence_interval_lower_bound, confidence_interval_upper_bound))
}
# # Function to calculate bootstrap confidence intervals
# standard_error_confidence_interval_fun <- function(observed_data, n_bootstraps = 1000, confidence_level = 0.95) {
#
# # Function to calculate the mean for each bootstrap sample
# compute_bootstrap_mean_fun <- function(observed_data_urn) {
# bootstrap_dataset <- sample(observed_data_urn, replace = TRUE)
# bootstrap_estimate <- mean(bootstrap_dataset)
# return(bootstrap_estimate)
# }
#
# # Perform bootstrap resampling
# bootstrap_point_estimates <- replicate(n_bootstraps, compute_bootstrap_mean_fun(observed_data))
#
# # Calculate the percentiles for the confidence interval
# alpha <- (1 - confidence_level) / 2
# confidence_interval_lower_bound <- quantile(bootstrap_point_estimates, alpha) # 2.5th percentile
# confidence_interval_upper_bound <- quantile(bootstrap_point_estimates, 1 - alpha) # 97.5th percentile
#
# # Return the confidence interval
# return(c(confidence_interval_lower_bound, confidence_interval_upper_bound))
# }
## ROH-hotspot selection testing results://n
| Model | Avg_ROH_hotspot_threshold |
|---|---|
| Neutral | 40.0 |
| s=0.1 | 42.6 |
| s=0.3 | 45.1 |
| s=0.05 | 46.6 |
| s=0.2 | 47.0 |
| s=0.4 | 47.0 |
| s=0.6 | 52.0 |
| s=0.5 | 54.2 |
| s=0.7 | 58.2 |
| s=0.8 | 60.6 |
| Empirical | 75.0 |
## Overall Average Avg_F_ROH for german_shepherd : 0.2753012 //n
## Point estimate F_ROH across all 20 simulations: 0.3231565 //n
## [1] "Bootstrap 95% Confidence Interval: [0.302707382298313, 0.343605637701687]"
## Point estimate F_ROH across all 20 simulations for selection_model_s005_chr3 : 0.3789376 //n[1] "Bootstrap 95% Confidence Interval: [0.356936709259486, 0.400938510740514]"
## Point estimate F_ROH across all 20 simulations for selection_model_s01_chr3 : 0.3475749 //n[1] "Bootstrap 95% Confidence Interval: [0.329352061370301, 0.365797778629699]"
## Point estimate F_ROH across all 20 simulations for selection_model_s02_chr3 : 0.3852102 //n[1] "Bootstrap 95% Confidence Interval: [0.360506697169921, 0.409913642830079]"
## Point estimate F_ROH across all 20 simulations for selection_model_s03_chr3 : 0.382973 //n[1] "Bootstrap 95% Confidence Interval: [0.356057264547997, 0.409888755452003]"
## Point estimate F_ROH across all 20 simulations for selection_model_s04_chr3 : 0.4022861 //n[1] "Bootstrap 95% Confidence Interval: [0.379804687863133, 0.424767512136866]"
## Point estimate F_ROH across all 20 simulations for selection_model_s05_chr3 : 0.438943 //n[1] "Bootstrap 95% Confidence Interval: [0.407964226284795, 0.469921813715205]"
## Point estimate F_ROH across all 20 simulations for selection_model_s06_chr3 : 0.4301045 //n[1] "Bootstrap 95% Confidence Interval: [0.398231547108479, 0.461977472891521]"
## Point estimate F_ROH across all 20 simulations for selection_model_s07_chr3 : 0.4466082 //n[1] "Bootstrap 95% Confidence Interval: [0.419672277441459, 0.473544062558542]"
## Point estimate F_ROH across all 20 simulations for selection_model_s08_chr3 : 0.4687358 //n[1] "Bootstrap 95% Confidence Interval: [0.432117907312471, 0.505353732687529]"
| Model | F_ROH | Lower_CI | Upper_CI |
|---|---|---|---|
| Empirical | 0.275 | NA | NA |
| Neutral | 0.323 | 0.303 | 0.344 |
| s=0.1 | 0.348 | 0.329 | 0.366 |
| s=0.05 | 0.379 | 0.357 | 0.401 |
| s=0.3 | 0.383 | 0.356 | 0.410 |
| s=0.2 | 0.385 | 0.361 | 0.410 |
| s=0.4 | 0.402 | 0.380 | 0.425 |
| s=0.6 | 0.430 | 0.398 | 0.462 |
| s=0.5 | 0.439 | 0.408 | 0.470 |
| s=0.7 | 0.447 | 0.420 | 0.474 |
| s=0.8 | 0.469 | 0.432 | 0.505 |
## Models where empirical F_ROH is within CI:
## character(0)
##
## Models where empirical F_ROH is outside CI:
## [1] "s=0.05" "s=0.1" "s=0.2" "s=0.3" "s=0.4" "s=0.5" "s=0.6"
## [8] "s=0.7" "s=0.8" "Neutral"
## Uncommented because change of analysis
## Point estimate H_e across all 20 simulations for s005_chr3 : 0.2029054 //n[1] "Bootstrap 95% Confidence Interval: [0.157277175652386, 0.248533565319362]"
## Point estimate H_e across all 20 simulations for s01_chr3 : 0.1686049 //n[1] "Bootstrap 95% Confidence Interval: [0.123005410708793, 0.214204429478017]"
## Point estimate H_e across all 20 simulations for s02_chr3 : 0.1434164 //n[1] "Bootstrap 95% Confidence Interval: [0.106661691025297, 0.180171037422669]"
## Point estimate H_e across all 20 simulations for s03_chr3 : 0.1834084 //n[1] "Bootstrap 95% Confidence Interval: [0.12586706169554, 0.240949803094704]"
## Point estimate H_e across all 20 simulations for s04_chr3 : 0.2695743 //n[1] "Bootstrap 95% Confidence Interval: [0.211933288351644, 0.327215260815894]"
## Point estimate H_e across all 20 simulations for s05_chr3 : 0.1402356 //n[1] "Bootstrap 95% Confidence Interval: [0.0879059094927421, 0.192565276654444]"
## Point estimate H_e across all 20 simulations for s06_chr3 : 0.2447953 //n[1] "Bootstrap 95% Confidence Interval: [0.178446160700846, 0.311144529886949]"
## Point estimate H_e across all 20 simulations for s07_chr3 : 0.1724229 //n[1] "Bootstrap 95% Confidence Interval: [0.116795880599175, 0.228049850340113]"
## Point estimate H_e across all 20 simulations for s08_chr3 : 0.2088567 //n[1] "Bootstrap 95% Confidence Interval: [0.145872583512026, 0.271840780204169]"
| Model | H_e_5th_percentile | Lower_CI | Upper_CI |
|---|---|---|---|
| s05_chr3 | 0.06171 | 0.05179 | 0.07163 |
| s07_chr3 | 0.06393 | 0.05149 | 0.07638 |
| s005_chr3 | 0.06445 | 0.05364 | 0.07527 |
| s02_chr3 | 0.06734 | 0.05849 | 0.07619 |
| s04_chr3 | 0.06806 | 0.05413 | 0.08198 |
| s06_chr3 | 0.06986 | 0.05725 | 0.08247 |
| Neutral | 0.07060 | 0.05843 | 0.08276 |
| s01_chr3 | 0.07064 | 0.05838 | 0.08291 |
| s03_chr3 | 0.08388 | 0.06884 | 0.09891 |
| s08_chr3 | 0.08788 | 0.07636 | 0.09940 |
| Empirical | NA | NA | NA |
##
## ROH-hotspot threshold comparison between the different datasets
| Model | Avg_ROH_hotspot_threshold |
|---|---|
| Neutral | 40.0 |
| s=0.1 | 42.6 |
| s=0.3 | 45.1 |
| s=0.05 | 46.6 |
| s=0.2 | 47.0 |
| s=0.4 | 47.0 |
| s=0.6 | 52.0 |
| s=0.5 | 54.2 |
| s=0.7 | 58.2 |
| s=0.8 | 60.6 |
| Empirical | 75.0 |
| Model | F_ROH | Lower_CI | Upper_CI |
|---|---|---|---|
| Empirical | 0.275 | NA | NA |
| Neutral | 0.323 | 0.303 | 0.344 |
| s=0.1 | 0.348 | 0.329 | 0.366 |
| s=0.05 | 0.379 | 0.357 | 0.401 |
| s=0.3 | 0.383 | 0.356 | 0.410 |
| s=0.2 | 0.385 | 0.361 | 0.410 |
| s=0.4 | 0.402 | 0.380 | 0.425 |
| s=0.6 | 0.430 | 0.398 | 0.462 |
| s=0.5 | 0.439 | 0.408 | 0.470 |
| s=0.7 | 0.447 | 0.420 | 0.474 |
| s=0.8 | 0.469 | 0.432 | 0.505 |
## [1] "Selection test results"
## [1] "ROH-hotspot windows with an mean H_e Value lower or equal to the lower confidence interval of the fifth percentile of the neutral model are classified as being under selection"
## [1] "5th percentile of the neutral model is: 0.05843441"
| Name | Under_selection | Window_based_Average_H_e |
|---|---|---|
| Hotspot_chr3_window_1 | No | 0.10706 |
| Hotspot_chr3_window_3 | No | 0.12371 |
| Hotspot_chr3_window_2 | No | 0.14453 |
| Hotspot_chr17_window_2 | No | 0.14866 |
| Hotspot_chr17_window_1 | No | 0.15285 |
| Hotspot_chr19_window_1 | No | 0.16270 |
## [1] "ROH-hotspots under selection:"
| Name | Under_selection | Window_based_Average_H_e |
|---|
| Model | H_e | Lower_CI | Upper_CI | Under_Selection |
|---|---|---|---|---|
| Neutral | 0.07060 | 0.05843 | 0.08276 | No |
| s=0.5 | 0.14024 | 0.08791 | 0.19257 | No |
| s=0.2 | 0.14342 | 0.10666 | 0.18017 | No |
| s=0.1 | 0.16860 | 0.12301 | 0.21420 | No |
| s=0.7 | 0.17242 | 0.11680 | 0.22805 | No |
| s=0.3 | 0.18341 | 0.12587 | 0.24095 | No |
| s=0.05 | 0.20291 | 0.15728 | 0.24853 | No |
| s=0.8 | 0.20886 | 0.14587 | 0.27184 | No |
| s=0.6 | 0.24480 | 0.17845 | 0.31114 | No |
| s=0.4 | 0.26957 | 0.21193 | 0.32722 | No |
*** Behöver bootstrap av 5th percentiles från neutral model
| Model | Lengths_Mb | Avg_ROH_Freq |
|---|---|---|
| Hotspot_chr3_window_1 | 10.900 | 81.3 |
| s=0.8 | 7.895 | 88.9 |
| s=0.7 | 6.220 | 93.2 |
| s=0.6 | 5.875 | 87.2 |
| s=0.5 | 5.040 | 92.9 |
| s=0.1 | 4.775 | 73.2 |
| s=0.4 | 4.625 | 78.1 |
| s=0.3 | 4.435 | 83.1 |
| s=0.2 | 4.430 | 78.9 |
| Hotspot_chr19_window_1 | 4.400 | 75.6 |
| s=0.05 | 3.655 | 71.0 |
| Hotspot_chr3_window_2 | 3.200 | 76.3 |
| Hotspot_chr3_window_3 | 2.700 | 77.6 |
| Hotspot_chr17_window_1 | 2.000 | 76.4 |
| Hotspot_chr17_window_2 | 0.600 | 76.1 |
## Models where empirical H_e is within CI for Hotspot_chr3_window_1 :
## character(0)
##
## Models where empirical H_e is outside CI for Hotspot_chr3_window_1 :
## character(0)
## Models where empirical H_e is within CI for Hotspot_chr3_window_3 :
## character(0)
##
## Models where empirical H_e is outside CI for Hotspot_chr3_window_3 :
## character(0)
## Models where empirical H_e is within CI for Hotspot_chr3_window_2 :
## character(0)
##
## Models where empirical H_e is outside CI for Hotspot_chr3_window_2 :
## character(0)
## Models where empirical H_e is within CI for Hotspot_chr17_window_2 :
## character(0)
##
## Models where empirical H_e is outside CI for Hotspot_chr17_window_2 :
## character(0)
## Models where empirical H_e is within CI for Hotspot_chr17_window_1 :
## character(0)
##
## Models where empirical H_e is outside CI for Hotspot_chr17_window_1 :
## character(0)
## Models where empirical H_e is within CI for Hotspot_chr19_window_1 :
## character(0)
##
## Models where empirical H_e is outside CI for Hotspot_chr19_window_1 :
## character(0)